/* Copyright (c) 2003 The Nutch Organization. All rights reserved. */ /* Use subject to the conditions in http://www.nutch.org/LICENSE.txt. */ package net.nutch.util; import java.io.*; import java.util.*; import java.nio.channels.*; /**************************************************************** * NutchGenericFileSystem implements the NutchFileSystem interface * and adds some generic utility methods for subclasses to use. * * The standard task any implementor of NutchFileSystem * * @author Mike Cafarella ****************************************************************/ public abstract class NutchGenericFileSystem implements NutchFileSystem { File dbRoot, localTmp, flagFile; FileInputStream lockData; FileLock lock; ShareSet shareSet; boolean destructivePut; /** * Create a Nutch Filesystem at the indicated mounted * directory. */ public NutchGenericFileSystem(File dbRoot, ShareSet shareSet, boolean destructivePut) throws IOException { if (shareSet == null) { this.shareSet = new ShareSet(dbRoot); } else { this.shareSet = shareSet; } // // 1. Create/find main work area (which will receive files from // other processes and may be shared). this.dbRoot = dbRoot; if (! dbRoot.exists()) { dbRoot.mkdirs(); } if (! dbRoot.isDirectory()) { throw new IOException("Directory " + dbRoot + " does not exist."); } // // 2. Attempt to acquire an exclusive lock on the directory. // If this succeeds, the process should then clear out the // tmp storage area. If this fails, just continue. // Vector tmpDirs = new Vector(); File rootFiles[] = dbRoot.listFiles(); for (int i = 0; i < rootFiles.length; i++) { if (rootFiles[i].isDirectory() && rootFiles[i].getName().startsWith("localtmpdir")) { tmpDirs.add(rootFiles[i]); } } // If there are any tmpDirs for us to delete, try to do it. if (tmpDirs.size() > 0) { File exclusiveLockFile = new File(dbRoot, "nutchfslock"); exclusiveLockFile.createNewFile(); FileOutputStream exclusiveLockData = new FileOutputStream(exclusiveLockFile); FileLock exclusiveLock = exclusiveLockData.getChannel().tryLock(); // Once we have the lock, go and delete them if (exclusiveLock != null) { for (Enumeration e = tmpDirs.elements(); e.hasMoreElements(); ) { FileUtil.fullyDelete((File) e.nextElement()); } exclusiveLock.release(); exclusiveLockData.close(); } } // // 3. Acquire a non-exclusive lock on the directory. Block // until this is acquired. (The only thing preventing it // would be another process in step 2.) // File lockFile = new File(dbRoot, "nutchfslock"); lockFile.createNewFile(); this.lockData = new FileInputStream(lockFile); this.lock = lockData.getChannel().lock(0L, Long.MAX_VALUE, true); // // 4. Create the tmp directory // this.localTmp = File.createTempFile("localtmpdir", "", dbRoot); this.localTmp.delete(); if (! localTmp.exists()) { localTmp.mkdirs(); } if (! localTmp.isDirectory()) { throw new IOException("Directory " + localTmp + " does not exist."); } // // 5. Create the src lock file // this.flagFile = File.createTempFile("flag", "tmp"); // // 6. Whether files should be deleted after being copied // this.destructivePut = destructivePut; } /** * Acquire a real File for a name that's not yet under NutchFS * control. This may improve performance later on when the * File is put() under NutchFS control. It's also handy for * finding a file location where there is a lot of extra room. */ public File getWorkingFile() throws IOException { File f = File.createTempFile("tmp", "", localTmp); f.delete(); return f; } /** * Wait for a NutchFile from somewhere in NutchSpace. Translate * it to a regular old filesystem File. * * The file should already be in place. So we wait until it is. */ public File get(NutchFile nutchFile) throws IOException { return get(nutchFile, -1); } /** * Wait for a NutchFile for the specified amount of time. Return null * if we don't get it before 'timeout' ms have elapsed. */ public File get(NutchFile nutchFile, long timeout) throws IOException { long startTime = System.currentTimeMillis(); int numTries = 0; ShareGroup sg = shareSet.getShareGroup(nutchFile); File target = new File(dbRoot, nutchFile.getFilename()); File completeFlag = new File(dbRoot, nutchFile.getCompleteFlagName()); while (! completeFlag.exists()) { try { if ((numTries > 0) && (timeout > 0) && (System.currentTimeMillis() - startTime > timeout)) { return null; } Thread.sleep(1000); numTries++; if (numTries > 10) { System.err.println("NutchGenericFileSystem waiting for file " + completeFlag); } } catch (InterruptedException ie) { } } return target; } /** * Obtain a lock with the given NutchFile. This might mean obtaining * locks across many different machines/filesystems. That's fine, * as long as every machine always obtains the locks in a standard * ordering. */ public void lock(NutchFile nutchFile, boolean exclusive) throws IOException { File lockFile = getWorkingFile(); lockFile.createNewFile(); put(nutchFile, lockFile, false); ShareGroup sg = shareSet.getShareGroup(nutchFile); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); lockFile(locMach, locStr, nutchFile.getFilename(), exclusive); } } /** * Release the lock for the given NutchFile */ public void release(NutchFile nutchFile) throws IOException { ShareGroup sg = shareSet.getShareGroup(nutchFile); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); release(locMach, locStr, nutchFile.getFilename()); } } /** * Add a single file or a directory of files to the filesystem. * If the source File is a directory, we want to reproduce * the entire directory structure, rooted at the given * NutchFile. */ public void put(NutchFile nutchFile, File workingFile, boolean overwrite) throws IOException { if (workingFile.isDirectory()) { putDir(nutchFile, workingFile, overwrite); } else { putFile(nutchFile, workingFile, overwrite); } FileUtil.fullyDelete(workingFile); } /** * Add a directory and its contents to the filesystem */ void putDir(NutchFile nutchDir, File workingDir, boolean overwrite) throws IOException { File workingFiles[] = workingDir.listFiles(); NutchFile nutchFiles[] = new NutchFile[workingFiles.length]; // // Remove target dir's completion flag // ShareGroup sg = shareSet.getShareGroup(nutchDir); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); deleteFile(locMach, locStr, nutchDir.getCompleteFlagName()); } // // Build a list of all contained items // for (int i = 0; i < nutchFiles.length; i++) { nutchFiles[i] = new NutchFile(nutchDir, workingFiles[i].getName()); } // // Put the list to the FS // for (int i = 0; i < workingFiles.length; i++) { put(nutchFiles[i], workingFiles[i], overwrite); } // // We've written dir's contents, so write out completion flag // for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); copyFile(flagFile, locMach, locStr, nutchDir.getCompleteFlagName(), true); } } /** * Add a single file to the filesystem. */ void putFile(NutchFile nutchFile, File workingFile, boolean overwrite) throws IOException { ShareGroup sg = shareSet.getShareGroup(nutchFile); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); // Remove 'complete' flag deleteFile(locMach, locStr, nutchFile.getCompleteFlagName()); // Write file, if necessary. copyFile(workingFile, locMach, locStr, nutchFile.getFilename(), overwrite); // Write 'complete' flag copyFile(flagFile, locMach, locStr, nutchFile.getCompleteFlagName(), true); } } /** * Complete the given directory */ public void completeDir(NutchFile nutchFile) throws IOException { ShareGroup sg = shareSet.getShareGroup(nutchFile); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); // Write 'complete' flag copyFile(flagFile, locMach, locStr, nutchFile.getCompleteFlagName(), true); } } /** * Take the file out of the NutchFileSystem. */ public void delete(NutchFile nutchFile) throws IOException { ShareGroup sg = shareSet.getShareGroup(nutchFile); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); deleteFile(locMach, locStr, nutchFile.getFilename()); deleteFile(locMach, locStr, nutchFile.getCompleteFlagName()); } } /** * Rename the thing. Usually done at close. */ public void renameTo(NutchFile src, NutchFile dst) throws IOException { // Make sure src file is complete File srcFile = get(src); // Remove src complete flags ShareGroup sg = shareSet.getShareGroup(src); String locations[] = sg.getLocations(); for (int i = 0; i < locations.length; i++) { String locMach = extractMachine(locations[i]); String locStr = extractPath(locations[i]); // Remove src complete flags deleteFile(locMach, locStr, src.getCompleteFlagName()); // Rename contents renameFile(srcFile, locMach, locStr, dst.getFilename(), true); // Create target flags copyFile(flagFile, locMach, locStr, dst.getCompleteFlagName(), true); } } /** * Close down the Generic File System */ public void close() throws IOException { // Get rid of the tmp directory FileUtil.fullyDelete(localTmp); // Get rid of tmp flag file FileUtil.fullyDelete(flagFile); this.lock.release(); this.lockData.close(); } /** * To be implemented by subclasses */ protected abstract void copyFile(File srcFile, String locationMach, String locationStr, String nutchFileName, boolean overwrite) throws IOException; protected abstract void deleteFile(String locationMach, String locationStr, String nutchFileName) throws IOException; protected abstract void renameFile(File srcFile, String locationMach, String locationStr, String nutchFileName, boolean overwrite) throws IOException; protected abstract void lockFile(String locMach, String locStr, String filename, boolean exclusive) throws IOException; protected abstract void release(String locMach, String locStr, String filename) throws IOException; /** * Utility str-processing of location-string. * (format "machinename:path") */ String extractMachine(String location) { int colDex = location.indexOf(":"); if (colDex < 0) { return null; } return location.substring(0, colDex); } /** * Utility str-processing of location-string. * (format "machinename:path") */ String extractPath(String location) { int colDex = location.indexOf(":"); if (colDex < 0) { return location; } return location.substring(colDex + 1); } }